from bs4 import BeautifulSoup
import requests
import pymysql
import datetime
import time
from fake_useragent import UserAgent
import util
ua = UserAgent()

source = "anjuke"
db = pymysql.connect(
    host="172.17.9.105",
    user="cs_platform",
    password="cs_platfm_01",
    database="icrg_test",
)
c = db.cursor()
c.execute(f"delete from h_p where source='{source}' and crawl_date = '{datetime.date.today()}'")

sql = "insert into h_p(position,avenue,price,source,crawl_date) values(%s,%s,%s,%s,now())"

base_url = "https://wuhan.anjuke.com/sale/p%s"

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
}
datas = []
start = time.time()
for i in range(1, 51):
    url = base_url % i
    response = requests.get(url, headers=headers)
    bs = BeautifulSoup(response.text, "html.parser")
    ul_list = bs.select("div[class='property']")
    print(f"第{i}页,有无内容:{None != ul_list}")
    for ul in ul_list:
        p = ul.select("p[class='property-content-info-comm-name']")[0].get_text().strip()
        a = ul.select("p[class='property-content-info-comm-address']>span")[1].get_text().strip()
        price = ul.select("p[class='property-price-average']")[0].get_text().strip()
        # print(price.replace('元/㎡',''))
        datas.append((util.clean_text(p), util.clean_text(a), price.replace('元/㎡',''), source))
    c.executemany(sql, datas)
    db.commit()
db.close()
end = time.time()
print(end - start)
# print(datas)
